## Replication Code for 
## "The Impact of University Attendance on Partisanship"
## Apfeld, Coman, Gerring and Jessee
## Political Science Research and Methods

## NOTE: the following packages must be installed
##       prior to running the code below
##       rio, car, rdrobust, ivdesc, AER

# figures are output as pdf files
# the sink() function is used to write table output to .txt files

# reading in survey data
library(rio)
DAT <- import("final_data.dta")

# recoding GAL_TAN variable (flipping so 10=lib, 0=con)
# to get pollib variable
DAT$pollib <- 10-DAT$GAL_TAN

# creating recoded indicator variables
library(car) # to get recode()

# recoding childhood SES recall question
DAT$childhoodSES <- recode(DAT$p22, "'Lower class'=1; 'Working class'=2; 'Lower middle class'=3; 'Upper middle class'=4; 'Upper class'=5; else=NA")
DAT$fatheredu <- DAT$p20

# creating respondent's partisanship variable
DAT$ownparty <- ifelse(DAT$q14=="", NA, DAT$q14)

# creating father's partisanship variable
DAT$fparty <- ifelse(DAT$p21=="", NA, DAT$p21)

# creating variable for deviation from father's party
DAT$dev.party <- DAT$ownparty!=DAT$fparty

# creating variables for difference
# between respondent and father's 
# ideology (measured 3 ways)
DAT$dif_GAL_TAN <- DAT$GAL_TAN - DAT$f_GAL_TAN
DAT$dif_ECON_LR <- DAT$ECON_LR - DAT$f_ECON_LR
DAT$dif_left_right <- DAT$left_right - DAT$f_left_right

# creating full dataset
DAT.WITH2019 <- DAT
DAT.ONLY2019 <- subset(DAT.WITH2019, GRADYEAR==2019)
# subsetting data to drop GRADYEAR==2019
DAT <- subset(DAT, GRADYEAR!=2019)

###############################################
## REPLICATING MAIN PAPER TABLES AND FIGURES ##
###############################################

# Assessing compliance
# How what percent of people who "passed" the Bac went to college? 
# ...and what percent who "failed" the Bac went to college?
# from paper: "the proportions attending university above and  
#              below the cutoff are 0.86 and 0.21, respectively, 
#              a difference of 0.65."
prop.table(table(DAT$average >= 6, DAT$p6), margin=1)
# note: row variable is exam passage, 
#       column variable is university attendance

# Political Liberalism Analyses (H1, `pollib`)
# These analyses use the `pollib` variable 
# (which is flipped of `GAL_TAN`) that assigns 
# liberal/conservative scores based on the party 
# the respondent says they would vote for 
# if the election were held today


##############
## Figure 1 ##
##############
pdf("Figure1.pdf",
    height=4.5, width=9)
par(mfrow=c(1,2), mar=c(5,4,2,2))
# plotting proportion attending university 
# at each value of bac score
plot(tapply(DAT$p6, DAT$average, mean, na.rm=TRUE) ~ sort(unique(DAT$average)),
     cex=table(DAT$average)/15, pch=20, bty="n",
     xlab="Bac Score", ylab="Proportion attending university")
abline(v=6, lty=3, col="red")
abline(h=tapply(DAT$p6, DAT$average>=6, mean, na.rm=TRUE), lty=3, col="gray")
#
plot(tapply(DAT$pollib, DAT$average, mean, na.rm=TRUE) ~ sort(unique(DAT$average)), 
     cex=table(DAT$average)/15,
     pch=20, bty="n", 
     xlab="Bac Score", ylab="Average Cultural Ideology")
abline(v=6, lty=3, col="red")
abline(h=tapply(DAT$pollib, DAT$average>=6, mean, na.rm=TRUE), lty=3, col="gray")
# Point sizes are proportional to the number of observations 
dev.off()

#############
## TABLE 2 ##
#############
sink(file="Table2.txt")
library(rdrobust)
clustervar <- DAT$average
rd.main <- rdrobust(y=DAT$pollib, x=DAT$average, 
                    c=6, h=.2, fuzzy=DAT$p6,
                    vce = "hc0", cluster = clustervar)
summary(rd.main)
sink()

#############
## TABLE 3 ##
#############
## LOCAL RANDOMIZATION (DIFF IN MEANS)
## RD FOR PRE-TREATMENT COVARIATES
## father's education
sink(file="Table3.txt")
lm.fatheredu <- lm(fatheredu ~ average>=6,
                   data=DAT)
summary(lm.fatheredu)
confint(lm.fatheredu)
##
lm.childhoodSES <- lm(childhoodSES ~ average>=6,
                      data=DAT)
summary(lm.childhoodSES)
confint(lm.childhoodSES)
## humanities track
lm.humanist <- lm(humanist ~ average>=6,
                  data=DAT)
summary(lm.humanist)
confint(lm.humanist)
## urban HS
lm.HS_urban <- lm(HS_urban=="MUNICIPIU" ~ average>=6,
                  data=DAT)
summary(lm.HS_urban)
confint(lm.HS_urban)
#
## CONTINUITY-BASED RD FOR PRE-TREATMENT COVARIATES
## Father's Education
clustervar <- DAT$average
rd.fatheredu <- rdrobust(y=DAT$fatheredu, x=DAT$average, 
                         c=6, h=.2,
                         vce = "hc0", cluster = clustervar)
summary(rd.fatheredu)
## Childhood SES
clustervar <- DAT$average
rd.childhoodSES <- rdrobust(y=DAT$childhoodSES, x=DAT$average, 
                            c=6, h=.2,
                            vce = "hc0", cluster = clustervar)
summary(rd.childhoodSES)
## Humanities
clustervar <- DAT$average
rd.humanist <- rdrobust(y=DAT$humanist, x=DAT$average, 
                        c=6, h=.2,
                        vce = "hc0", cluster = clustervar)
summary(rd.humanist)
## HS urban
clustervar <- DAT$average
rd.HS_urban <- rdrobust(y=DAT$HS_urban=="MUNICIPIU", x=DAT$average, 
                        c=6, h=.2,
                        vce = "hc0", cluster = clustervar)
summary(rd.HS_urban)
sink()



#############
## TABLE 4 ##
#############
sink(file="Table4.txt")
clustervar <- DAT$average
Z <- cbind(DAT$fatheredu, DAT$childhoodSES,
           DAT$humanist, DAT$HS_urban=="MUNICIPIU")
colnames(Z) <- c("fatheredu", "childhoodSES",
                 "humanist", "HS_urban")
covs.missing <- is.na(DAT$fatheredu) | is.na(DAT$childhoodSES) |
  is.na(DAT$humanist) | is.na(DAT$HS_urban) |
  is.na(DAT$p6) | is.na(DAT$pollib) | is.na(DAT$average)
#
rd.main.covs <- rdrobust(y=DAT$pollib[!covs.missing], x=DAT$average[!covs.missing], 
                         covs=Z[!covs.missing, ],
                         c=6, h=.2, fuzzy=DAT$p6[!covs.missing],
                         vce = "hc0", cluster = clustervar[!covs.missing])
summary(rd.main.covs)
sink()



#############################################
## REPLICATING APPENDIX TABLES AND FIGURES ##
#############################################

## NOTE: Figures B1 and B2 are replicated using a separate code file
##       see readme file for more information

##############
## TABLE B1 ##
##############
# NOTE: Population summary statistics are in another file
#       see readme for more information
sink(file="TableB1_firstcolumn.txt")
# gender
prop.table(table(DAT.WITH2019$p11))
# single (never married)
prop.table(table(DAT.WITH2019$p12=="Single"))
# one or more children
prop.table(table(DAT.WITH2019$p13>=1))
# employed
prop.table(table(DAT.WITH2019$p14_1=="Yes"))
# Romanian ethnicity
prop.table(table(DAT.WITH2019$p23[DAT.WITH2019$p23!=""]))
# father's education level (4 cat)
# 4 category father's education
DAT.WITH2019$fatheredu4 <- c(1,1,1,1,1,2,3,4,4,4)[DAT.WITH2019$fatheredu]
prop.table(table(DAT.WITH2019$fatheredu4))
# childhood SES
prop.table(table(DAT.WITH2019$childhoodSES))
# current SES
prop.table(table(DAT.WITH2019$p15[DAT.WITH2019$p15!=""]))
# age
prop.table(table(2020-DAT.WITH2019$p17))
sink()

###############
## FIGURE B3 ##
###############
library(ivdesc)
#
DAT$passbac <- as.numeric(DAT$average>=6)
DAT$female <- ifelse(DAT$p11=="Female", 1, ifelse(DAT$p11=="Male", 0, NA))
#
childhoodSES.compliers <- ivdesc(X=DAT$childhoodSES,
                                 D=DAT$p6,
                                 Z=DAT$passbac,
                                 bootn=2000)
childhoodSES.compliers
#
fatheredu.compliers <- ivdesc(X=DAT$fatheredu,
                              D=DAT$p6,
                              Z=DAT$passbac,
                              bootn=2000)
fatheredu.compliers
#
female.compliers <- ivdesc(X=DAT$female,
                           D=DAT$p6,
                           Z=DAT$passbac,
                           bootn=2000)
female.compliers
#
urban.compliers <- ivdesc(X=DAT$urban_residence=="MUNICIPIU",
                          D=DAT$p6,
                          Z=DAT$passbac,
                          bootn=2000)
urban.compliers
#
gradyear.compliers <- ivdesc(X=DAT$GRADYEAR,
                             D=DAT$p6,
                             Z=DAT$passbac,
                             bootn=2000)
gradyear.compliers
#
# age.compliers <- ivdesc(X=2020-DAT$GRADYEAR,
#                         D=DAT$p6,
#                         Z=DAT$passbac,
#                         bootn=2000)
# age.compliers
# 
# making tables of complier estimates and CIs
childhoodSES.ctable <- cbind(childhoodSES.compliers$mu,
                             childhoodSES.compliers$mu_se,
                             childhoodSES.compliers$mu + 
                               qnorm(.025)*childhoodSES.compliers$mu_se,
                             childhoodSES.compliers$mu + 
                               qnorm(.975)*childhoodSES.compliers$mu_se)
#
fatheredu.ctable <- cbind(fatheredu.compliers$mu,
                          fatheredu.compliers$mu_se,
                          fatheredu.compliers$mu + 
                            qnorm(.025)*fatheredu.compliers$mu_se,
                          fatheredu.compliers$mu + 
                            qnorm(.975)*fatheredu.compliers$mu_se)
#
female.ctable <- cbind(female.compliers$mu,
                       female.compliers$mu_se,
                       female.compliers$mu + 
                         qnorm(.025)*female.compliers$mu_se,
                       female.compliers$mu + 
                         qnorm(.975)*female.compliers$mu_se)
#
urban.ctable <- cbind(urban.compliers$mu,
                      urban.compliers$mu_se,
                      urban.compliers$mu + 
                        qnorm(.025)*urban.compliers$mu_se,
                      urban.compliers$mu + 
                        qnorm(.975)*urban.compliers$mu_se)
#
gradyear.ctable <- cbind(gradyear.compliers$mu,
                         gradyear.compliers$mu_se,
                         gradyear.compliers$mu + 
                           qnorm(.025)*gradyear.compliers$mu_se,
                         gradyear.compliers$mu + 
                           qnorm(.975)*gradyear.compliers$mu_se)
#
# age.ctable <- cbind(age.compliers$mu,
#                     age.compliers$mu_se,
#                     age.compliers$mu + 
#                       qnorm(.025)*age.compliers$mu_se,
#                     age.compliers$mu + 
#                       qnorm(.975)*age.compliers$mu_se)
# #
# humanist.ctable <- cbind(humanist.compliers$mu,
#                          humanist.compliers$mu_se,
#                          humanist.compliers$mu + 
#                            qnorm(.025)*humanist.compliers$mu_se,
#                          humanist.compliers$mu + 
#                            qnorm(.975)*humanist.compliers$mu_se)
#
proportion.ctable <- cbind(childhoodSES.compliers$pi,
                           childhoodSES.compliers$pi_se,
                           childhoodSES.compliers$pi + 
                             qnorm(.025)*childhoodSES.compliers$pi_se,
                           childhoodSES.compliers$pi + 
                             qnorm(.975)*childhoodSES.compliers$pi_se)
#
# MAKING FIGURE OF COMPLIERS
pdf("FigureB3.pdf",
    height=8, width=7)
#
par(mfrow=c(3,2), mar=c(4,3,3,3))
#
plot(-(1:nrow(proportion.ctable)) ~ proportion.ctable[,1],
     xlim=c(0,1), ylim=c(-nrow(proportion.ctable)-.5, -.5),
     pch=19, yaxt="n", ylab="", xlab="", bty="n",
     main="Proportion of Types [0-1]")
for(i in 1:nrow(childhoodSES.ctable)){
  segments(proportion.ctable[i,3], -i, proportion.ctable[i,4], -i,
           lwd=2)
  text(proportion.ctable[i,1], -i+.3,
       c("all", "co", "nt", "at")[i])
}
#
plot(-(1:length(childhoodSES.compliers$mu)) ~ childhoodSES.compliers$mu,
     xlim=range(childhoodSES.ctable[,3:4]), 
     ylim=c(-nrow(childhoodSES.ctable)-.5, -.5),
     pch=19, yaxt="n", ylab="", xlab="", bty="n",
     main="Childhood SES [1-5]")
for(i in 1:nrow(childhoodSES.ctable)){
  segments(childhoodSES.ctable[i,3], -i, childhoodSES.ctable[i,4], -i,
           lwd=2)
  text(childhoodSES.ctable[i,1], -i+.3,
       c("all", "co", "nt", "at")[i])
}
#
plot(-(1:length(fatheredu.compliers$mu)) ~ fatheredu.compliers$mu,
     xlim=range(fatheredu.ctable[,3:4]), 
     ylim=c(-nrow(fatheredu.ctable)-.5, -.5),
     pch=19, yaxt="n", ylab="", xlab="", bty="n",
     main="Father's Edu. [1-11]")
for(i in 1:nrow(fatheredu.ctable)){
  segments(fatheredu.ctable[i,3], -i, fatheredu.ctable[i,4], -i,
           lwd=2)
  text(fatheredu.ctable[i,1], -i+.3,
       c("all", "co", "nt", "at")[i])
}
#
plot(-(1:length(female.compliers$mu)) ~ female.compliers$mu,
     xlim=range(female.ctable[,3:4]), 
     ylim=c(-nrow(female.ctable)-.5, -.5),
     pch=19, yaxt="n", ylab="", xlab="", bty="n",
     main="Female [0,1]")
for(i in 1:nrow(female.ctable)){
  segments(female.ctable[i,3], -i, female.ctable[i,4], -i,
           lwd=2)
  text(female.ctable[i,1], -i+.3,
       c("all", "co", "nt", "at")[i])
}
#
plot(-(1:length(urban.compliers$mu)) ~ urban.compliers$mu,
     xlim=range(urban.ctable[,3:4]), 
     ylim=c(-nrow(urban.ctable)-.5, -.5),
     pch=19, yaxt="n", ylab="", xlab="", bty="n",
     main="Urban [0,1]")
for(i in 1:nrow(urban.ctable)){
  segments(urban.ctable[i,3], -i, urban.ctable[i,4], -i,
           lwd=2)
  text(urban.ctable[i,1], -i+.3,
       c("all", "co", "nt", "at")[i])
}
#
plot(-(1:length(gradyear.compliers$mu)) ~ gradyear.compliers$mu,
     xlim=range(gradyear.ctable[,3:4]), 
     ylim=c(-nrow(gradyear.ctable)-.5, -.5),
     pch=19, yaxt="n", ylab="", xlab="", bty="n",
     main="HS Grad Year [2015-2018]")
for(i in 1:nrow(gradyear.ctable)){
  segments(gradyear.ctable[i,3], -i, gradyear.ctable[i,4], -i,
           lwd=2)
  text(gradyear.ctable[i,1], -i+.3,
       c("all", "co", "nt", "at")[i])
}
dev.off()
## NOTE: Table B2 replication code in separate file
##       see readme file


###############
## FIGURE B4 ##
###############
pdf("FigureB4.pdf",
    height=7, width=7)
par(mfrow=c(2,2), mar=c(5,4,2,2))
# plotting proportion attending university 
# at each value of bac score
plot(tapply(DAT$fatheredu, DAT$average, mean, na.rm=TRUE) ~ sort(unique(DAT$average)),
     cex=table(DAT$average)/15, pch=20, bty="n", ylim=c(6,8),
     xlab="Bac Score", ylab="Father's Education")
abline(v=6, lty=3, col="red")
abline(h=tapply(DAT$fatheredu, DAT$average>=6, mean, na.rm=TRUE), lty=3, col="gray")
#
plot(tapply(DAT$childhoodSES, DAT$average, mean, na.rm=TRUE) ~ sort(unique(DAT$average)),
     cex=table(DAT$average)/15, pch=20, bty="n", ylim=c(2,4),
     xlab="Bac Score", ylab="Childhood SES")
abline(v=6, lty=3, col="red")
abline(h=tapply(DAT$childhoodSES, DAT$average>=6, mean, na.rm=TRUE), lty=3, col="gray")
#
plot(tapply(DAT$humanist, DAT$average, mean, na.rm=TRUE) ~ sort(unique(DAT$average)),
     cex=table(DAT$average)/15, pch=20, bty="n", ylim=c(0,1),
     xlab="Bac Score", ylab="Humanities/Social Science Track")
abline(v=6, lty=3, col="red")
abline(h=tapply(DAT$humanist, DAT$average>=6, mean, na.rm=TRUE), lty=3, col="gray")
#
plot(tapply(DAT$HS_urban=="MUNICIPIU", DAT$average, mean, na.rm=TRUE) ~ sort(unique(DAT$average)),
     cex=table(DAT$average)/15, pch=20, bty="n", ylim=c(0,1),
     xlab="Bac Score", ylab="Urban High School")
abline(v=6, lty=3, col="red")
abline(h=tapply(DAT$HS_urban=="MUNICIPIU", DAT$average>=6, mean, na.rm=TRUE), lty=3, col="gray")
dev.off()

###############
## FIGURE C1 ##
###############
bandwidth.radius <- c(.075,.1, .125,.15, .175,.2)
bandwidth.est.table.pollib <- matrix(NA, nrow=length(bandwidth.radius), ncol=3)
rownames(bandwidth.est.table.pollib) <- bandwidth.radius
colnames(bandwidth.est.table.pollib) <- c("tau_hat", "ci_lower", "ci_upper")
for(i in 1:length(bandwidth.radius)){
  foo.pollib <- rdrobust(y=DAT$pollib,
                         x=DAT$average, 
                         c=6, h=bandwidth.radius[i], 
                         fuzzy=DAT$p6,
                         vce = "hc0", 
                         cluster = DAT$average)
  bandwidth.est.table.pollib[i, ] <- c(foo.pollib$coef[1,], foo.pollib$ci[3,])
}
#
pdf("FigureC1.pdf",
    height=5, width=8)
plot(bandwidth.radius, bandwidth.est.table.pollib[,1], 
     xlab="", 
     ylab="Estimated effect of university attendance",
     bty="n", ylim=c(-3,3), main="DV: Political Liberalism",
     pch=20, col="red", xaxt="n")
axis(1, at=bandwidth.radius, labels=substr(bandwidth.radius, 2, 5), las=2)
for(i in 1:length(bandwidth.radius)){
  segments(bandwidth.radius[i], bandwidth.est.table.pollib[i,2], 
           bandwidth.radius[i], bandwidth.est.table.pollib[i,3])
}
abline(h=0, col="gray", lty=3)
dev.off()

###############
## FIGURE C2 ##
###############
donut.radius <- c(0, .02, .04, .06, .08, .1)
donut.est.table.pollib <- matrix(NA, nrow=length(donut.radius), ncol=3)
rownames(donut.est.table.pollib) <- donut.radius
colnames(donut.est.table.pollib) <- c("tau_hat", "ci_lower", "ci_upper")
#
for(i in 1:length(donut.radius)){
  foo.pollib <- rdrobust(y=DAT$pollib[DAT$average < 6 - donut.radius[i] |
                                        DAT$average >= 6 + donut.radius[i]],
                         x=DAT$average[DAT$average < 6 - donut.radius[i] | 
                                         DAT$average >= 6 + donut.radius[i]], 
                         c=6, h=.2, 
                         fuzzy=DAT$p6[DAT$average < 6 - donut.radius[i] | 
                                        DAT$average >= 6 + donut.radius[i]],
                         vce = "hc0", 
                         cluster = DAT$average[DAT$average < 6 - donut.radius[i] | 
                                                 DAT$average >= 6 + donut.radius[i]])
  donut.est.table.pollib[i, ] <- c(foo.pollib$coef[1,], foo.pollib$ci[3,])
}
#
pdf("FigureC2.pdf",
    height=5, width=7)
plot(donut.radius, donut.est.table.pollib[,1], 
     xlab="Donut Hole Radius", ylab="Estimated effect of university attendance",
     main="DV: Political Liberalism",
     bty="n", ylim=c(-6,8), #range(donut.est.table.pollib), 
     pch=20, col="red")
for(i in 1:length(donut.radius)){
  segments(donut.radius[i], donut.est.table.pollib[i,2], 
           donut.radius[i], donut.est.table.pollib[i,3])
}
abline(h=0, col="gray", lty=3)
dev.off()

##############
## TABLE C3 ##
##############
sink(file="TableC3.txt")
## first stage results 
cat("First Stage (all observations)")
cat("\n")
lm.firststage <- lm(p6 ~ average>=6, 
                    data=subset(DAT,
                                !is.na(average) & !is.na(p6) & !is.na(pollib)))
summary(lm.firststage)
confint(lm.firststage)
## first stage results 
cat("\n")
cat("\n")
cat("\n")
cat("First Stage (.15 bandwidth)")
cat("\n")
lm.firststage.15 <- lm(p6 ~ average>=6, 
                       data=subset(DAT, abs(average-6)<=.15 &
                                     !is.na(average) & !is.na(p6) & !is.na(pollib)))
summary(lm.firststage.15)
confint(lm.firststage.15)
## first stage results 
cat("\n")
cat("\n")
cat("\n")
cat("First Stage (.1 bandwidth)")
cat("\n")
lm.firststage.1 <- lm(p6 ~ average>=6, 
                      data=subset(DAT, abs(average-6)<=.1 &
                                    !is.na(average) & !is.na(p6) & !is.na(pollib)))
summary(lm.firststage.1)
confint(lm.firststage.1)
## first stage results .05
cat("\n")
cat("\n")
cat("\n")
cat("First Stage (.05 bandwidth)")
cat("\n")
lm.firststage.05 <- lm(p6 ~ average>=6, 
                       data=subset(DAT, abs(average-6)<=.05 & 
                                     !is.na(average) & !is.na(p6) & !is.na(pollib)))
summary(lm.firststage.05)
confint(lm.firststage.05)
## first stage closest 
cat("\n")
cat("\n")
cat("\n")
cat("First Stage (minimum bandwidth)")
cat("\n")
lm.firststage.closest <- lm(p6 ~ average>=6, 
                            data=subset(DAT, (average==6 | average==5.983333) &
                                          !is.na(average) & !is.na(p6) & !is.na(pollib)))
summary(lm.firststage.closest)
confint(lm.firststage.closest)
#
## ITT results 
cat("\n")
cat("\n")
cat("\n")
cat("ITT (all observations)")
cat("\n")
lm.itt <- lm(pollib ~ average>=6, 
             data=subset(DAT, !is.na(average) & !is.na(p6) & !is.na(pollib)))
summary(lm.itt)
confint(lm.itt)
## ITT results .15
cat("\n")
cat("\n")
cat("\n")
cat("ITT (.15 bandwidth)")
cat("\n")
lm.itt.15 <- lm(pollib ~ average>=6, 
                data=subset(DAT, abs(average-6)<=.15 & 
                              !is.na(average) & !is.na(p6) & !is.na(pollib)))
summary(lm.itt.15)
confint(lm.itt.15)
## ITT results .10
cat("\n")
cat("\n")
cat("\n")
cat("ITT (.1 bandwidth)")
cat("\n")
lm.itt.10 <- lm(pollib ~ average>=6, 
                data=subset(DAT, abs(average-6)<=.10 & 
                              !is.na(average) & !is.na(p6) & !is.na(pollib)))
summary(lm.itt.10)
confint(lm.itt.10)
## ITT results .05
cat("\n")
cat("\n")
cat("\n")
cat("ITT (.05 bandwidth)")
cat("\n")
lm.itt.05 <- lm(pollib ~ average>=6, 
                data=subset(DAT, abs(average-6)<=.05 & 
                              !is.na(average) & !is.na(p6) & !is.na(pollib)))
summary(lm.itt.05)
confint(lm.itt.05)
## ITT closest 
cat("\n")
cat("\n")
cat("\n")
cat("ITT (minimum bandwidth)")
cat("\n")
lm.itt.closest <- lm(pollib ~ average>=6, 
                     data=subset(DAT, (average==6 | average==5.983333) & 
                                   !is.na(average) & !is.na(p6) & !is.na(pollib)))
summary(lm.itt.closest)
confint(lm.itt.closest)
#
## Effect of Univ Attendance (TSLS results)
library(AER)
cat("\n")
cat("\n")
cat("\n")
cat("Effect of University (all observations)")
cat("\n")
lm.tsls <- ivreg(pollib ~ p6 | average>=6,
                 data=DAT)
summary(lm.tsls)
confint(lm.tsls)
# tsls .15
cat("\n")
cat("\n")
cat("\n")
cat("Effect of University (.15 bandwidth)")
cat("\n")
lm.tsls.15 <- ivreg(pollib ~ p6 | average>=6,
                    data=subset(DAT, abs(average-6)<=.15))
summary(lm.tsls.15)
confint(lm.tsls.15)
# tsls .1
cat("\n")
cat("\n")
cat("\n")
cat("Effect of University (.1 bandwidth)")
cat("\n")
lm.tsls.1 <- ivreg(pollib ~ p6 | average>=6,
                   data=subset(DAT, abs(average-6)<=.1))
summary(lm.tsls.1)
confint(lm.tsls.1)
# tsls .05
cat("\n")
cat("\n")
cat("\n")
cat("Effect of University (.05 bandwidth)")
cat("\n")
lm.tsls.05 <- ivreg(pollib ~ p6 | average>=6,
                    data=subset(DAT, abs(average-6)<=.05))
summary(lm.tsls.05)
confint(lm.tsls.05)
# tsls closest
cat("\n")
cat("\n")
cat("\n")
cat("Effect of University (minimum bandwidth)")
cat("\n")
lm.tsls.closest <- ivreg(pollib ~ p6 | average>=6,
                         data=subset(DAT, average==6 | average==5.983333))
summary(lm.tsls.closest)
confint(lm.tsls.closest)
#
cat("\n")
cat("\n")
cat("\n")
cat("N (all observations)")
cat("\n")
sum(!is.na(DAT$average) & !is.na(DAT$p6))
cat("\n")
cat("N_above and N_below (all observations)")
cat("\n")
table(DAT$average[!is.na(DAT$p6)]>=6)
cat("\n")
cat("\n")
cat("N (.15 bandwidth)")
cat("\n")
sum((!is.na(DAT$average) & !is.na(DAT$p6))[abs(DAT$average-6)<=.15],
    na.rm=TRUE)
cat("\n")
cat("N_above and N_below (.15 bandwidth)")
cat("\n")
table(DAT$average[!is.na(DAT$p6) & abs(DAT$average-6)<=.15]>=6)
cat("\n")
cat("\n")
cat("N (.1 bandwidth)")
cat("\n")
sum((!is.na(DAT$average) & !is.na(DAT$p6))[abs(DAT$average-6)<=.1],
    na.rm=TRUE)
cat("\n")
cat("N_above and N_below (.1 bandwidth)")
cat("\n")
table(DAT$average[!is.na(DAT$p6) & abs(DAT$average-6)<=.1]>=6)
cat("\n")
cat("\n")
cat("N (.05 bandwidth)")
cat("\n")
sum((!is.na(DAT$average) & !is.na(DAT$p6))[abs(DAT$average-6)<=.05],
    na.rm=TRUE)
cat("\n")
cat("N_above and N_below (.05 bandwidth)")
cat("\n")
table(DAT$average[!is.na(DAT$p6) & abs(DAT$average-6)<=.05]>=6)
cat("\n")
cat("\n")
cat("N (minimum bandwidth)")
cat("\n")
sum((!is.na(DAT$average) & !is.na(DAT$p6))[DAT$average==6 | DAT$average==5.983333],
    na.rm=TRUE)
cat("\n")
cat("N_above and N_below (minimum bandwidth)")
cat("\n")
table(DAT$average[!is.na(DAT$p6) & (DAT$average==6 | DAT$average==5.983333)]>=6)
sink()

## Figure D1 replication code in another file
## see readme for more information


###############
## FIGURE E1 ##
###############
year.est.table <- matrix(NA, nrow=5, ncol=3)
rownames(year.est.table) <- 2015:2019
colnames(year.est.table) <- c("tau_hat", "ci_lower", "ci_upper")

for(i in 1:5){
  subyear <- (2015:2019)[i]
  foo <- rdrobust(y=DAT.WITH2019$pollib[DAT.WITH2019$GRADYEAR==subyear],
                  x=DAT.WITH2019$average[DAT.WITH2019$GRADYEAR==subyear], 
                  c=6, h=.2, fuzzy=DAT.WITH2019$p6[DAT.WITH2019$GRADYEAR==subyear],
                  vce = "hc0", cluster = DAT.WITH2019$average[DAT.WITH2019$GRADYEAR==subyear])
  year.est.table[i, ] <- c(foo$coef[1,], foo$ci[3,])
}
pdf("FigureE1.pdf",
    height=5, width=7)
plot(2015:2019, year.est.table[,1], 
     xlab="Year of Graduation", xaxt="n",
     ylab="Estimated effect of university attendance",
     bty="n", ylim=range(year.est.table), pch=20, col="red")
axis(1, at=2015:2019, labels=2015:2019)
for(i in 1:5){
  segments((2015:2019)[i], year.est.table[i,2], (2015:2019)[i], year.est.table[i,3])
}
abline(h=0, col="gray", lty=3)
dev.off()

##############
## TABLE E1 ##
##############
sink(file="TableE1.txt")
rd.nonurban <- rdrobust(y=DAT$pollib[DAT$HS_urban!="MUNICIPIU"], 
                        x=DAT$average[DAT$HS_urban!="MUNICIPIU"], 
                        c=6, h=.2,
                        fuzzy=DAT$p6[DAT$HS_urban!="MUNICIPIU"],
                        vce = "hc0", 
                        cluster = clustervar[DAT$HS_urban!="MUNICIPIU"])
summary(rd.nonurban)
#
clustervar <- DAT$average
rd.urban <- rdrobust(y=DAT$pollib[DAT$HS_urban=="MUNICIPIU"], 
                     x=DAT$average[DAT$HS_urban=="MUNICIPIU"], 
                     c=6, h=.2,
                     fuzzy=DAT$p6[DAT$HS_urban=="MUNICIPIU"],
                     vce = "hc0", 
                     cluster = clustervar[DAT$HS_urban=="MUNICIPIU"])
summary(rd.urban)
sink()

##############
## TABLE E2 ##
##############
sink(file="TableE2.txt")
clustervar <- DAT$average
rd.science <- rdrobust(y=DAT$pollib[DAT$humanist==0], 
                       x=DAT$average[DAT$humanist==0], 
                       c=6, h=.2,
                       fuzzy=DAT$p6[DAT$humanist==0],
                       vce = "hc0", 
                       cluster = clustervar[DAT$humanist==0])
summary(rd.science)
# 
rd.humanities <- rdrobust(y=DAT$pollib[DAT$humanist==1], 
                          x=DAT$average[DAT$humanist==1], 
                          c=6, h=.2,
                          fuzzy=DAT$p6[DAT$humanist==1],
                          vce = "hc0", 
                          cluster = clustervar[DAT$humanist==1])
summary(rd.humanities)
sink()

##############
## TABLE E3 ##
##############
sink(file="TableE3.txt")
clustervar <- DAT$average
rd.parentalPID <- rdrobust(y=DAT$dev.party, x=DAT$average, 
                           c=6, h=.2, fuzzy=DAT$p6,
                           vce = "hc0", cluster = clustervar)
summary(rd.parentalPID)
sink()

###############
## FIGURE E2 ##
###############
pdf("FigureE2.pdf",
    height=5, width=5)
pairs(cbind(DAT$pollib, 10-DAT$ECON_LR, 10-DAT$left_right),
      labels=c("Cultural", "Economic", "Left/Right"), lower.panel=NULL)
dev.off()


##############
## TABLE E4 ##
##############
sink("TableE4.txt")
# creating table of parties and ideology scores
PSCORES.RAW <- cbind.data.frame(DAT$ownparty,
                                10-DAT$GAL_TAN,
                                10-DAT$ECON_LR, 
                                10-DAT$left_right)
names(PSCORES.RAW) <- c("Party",
                        "GALTAN",
                        "LRECON",
                        "LRGEN")
PSCORES <- unique(PSCORES.RAW)
PSCORES <- PSCORES[!is.na(PSCORES[,2]),]
PSCORES[,-1] <- round(PSCORES[,-1], 2)
PSCORES <- PSCORES[order(PSCORES$Party), ]
PSCORES

cat("\n")
cat("\n")
cat("Table to get N's")
cat("\n")
cat("\n")
simplified.party <- ifelse(DAT$ownparty %in% PSCORES$Party, DAT$ownparty, "Other/NA")
table(simplified.party)
sink()

##############
## TABLE E5 ##
##############
sink(file="TableE5.txt")
clustervar <- DAT$average
# NOTE: reversing direction of ECON_LR variable
rd.econ <- rdrobust(y=10-DAT$ECON_LR, x=DAT$average, 
                    c=6, h=.2, fuzzy=DAT$p6,
                    vce = "hc0", cluster = clustervar)
summary(rd.econ)
#
clustervar <- DAT$average
# NOTE: reversing direction of left_right variable
rd.rightleft <- rdrobust(y=10-DAT$left_right, x=DAT$average, 
                    c=6, h=.2, fuzzy=DAT$p6,
                    vce = "hc0", cluster = clustervar)
summary(rd.rightleft)
sink()

##############
## TABLE E6 ##
##############
sink(file="TableE6.txt")
## local rand (TSLS) results Econ
library(AER)
lm.tsls.econ <- ivreg(ECON_LR ~ p6 | average>=6,
                      data=DAT)
summary(lm.tsls.econ)
confint(lm.tsls.econ)
# tsls .15
lm.tsls.15.econ <- ivreg(ECON_LR ~ p6 | average>=6,
                         data=subset(DAT, abs(average-6)<=.15))
summary(lm.tsls.15.econ)
confint(lm.tsls.15.econ)
# tsls .1
lm.tsls.1.econ <- ivreg(ECON_LR ~ p6 | average>=6,
                        data=subset(DAT, abs(average-6)<=.1))
summary(lm.tsls.1.econ)
confint(lm.tsls.1.econ)
# tsls .05
lm.tsls.05.econ <- ivreg(ECON_LR ~ p6 | average>=6,
                         data=subset(DAT, abs(average-6)<=.05))
summary(lm.tsls.05.econ)
confint(lm.tsls.05.econ)
# tsls closest
lm.tsls.closest.econ <- ivreg(ECON_LR ~ p6 | average>=6,
                              data=subset(DAT, average==6 | average==5.983333))
summary(lm.tsls.closest.econ)
confint(lm.tsls.closest.econ)
#
# Left/Right Ideol LocRand (TSLS) results
lm.tsls.leftright <- ivreg(left_right ~ p6 | average>=6,
                           data=DAT)
summary(lm.tsls.leftright)
confint(lm.tsls.leftright)
# tsls .15
lm.tsls.15.leftright <- ivreg(left_right ~ p6 | average>=6,
                              data=subset(DAT, abs(average-6)<=.15))
summary(lm.tsls.15.leftright)
confint(lm.tsls.15.leftright)
# tsls .1
lm.tsls.1.leftright <- ivreg(left_right ~ p6 | average>=6,
                             data=subset(DAT, abs(average-6)<=.1))
summary(lm.tsls.1.leftright)
confint(lm.tsls.1.leftright)
# tsls .05
lm.tsls.05.leftright <- ivreg(left_right ~ p6 | average>=6,
                              data=subset(DAT, abs(average-6)<=.05))
summary(lm.tsls.05.leftright)
confint(lm.tsls.05.leftright)
# tsls closest
lm.tsls.closest.leftright <- ivreg(left_right ~ p6 | average>=6,
                                   data=subset(DAT, average==6 | average==5.983333))
summary(lm.tsls.closest.leftright)
confint(lm.tsls.closest.leftright)
sink()


###############
## FIGURE G1 ##
###############
library(rdrobust)
clustervar <- DAT$average
rd.pollib <- rdrobust(y=DAT$pollib, x=DAT$average, 
                      c=6, h=.2, fuzzy=DAT$p6,
                      vce = "hc0", cluster = clustervar)
summary(rd.pollib)

pdf("FigureG1.pdf",
    height=6, width=3)
par(mar=c(2,2,2,4))
plot(rd.pollib$coef[1], 
     pch=19, xlim=c(.5, 2.5), 
     bty="n", xlab="", xaxt="n", 
     ylab="", ylim=c(-.5, 1.5))
axis(side= 4, at=seq(-.5, 1.5, by=.5))
mtext("Estimated Effect", side=4, line=2)
segments(1, rd.pollib$ci[3,1], 1, rd.pollib$ci[3,2], lwd=2)
abline(h=0, lty=3)
# adding in WVS/EVS effect estimates
# hard coding results from TableG1.txt
points(2, .4619057, # NOTE: FLIPPING HERE b/c GALTAN REVERSED
       pch=19, col="gray")
segments(2, .3002654,
         2, .6235461,
         col="grey", lwd=2)
dev.off()